--- %%NOBANNER%% -->
![]() | ![]() |
/*------------------<--- Start of Description -->--------------------\ | PERFORM POLYNOMIAL REGRESSION ANALYSIS ON XVAR WITH THE OPTION OF | | FIRST ELIMINATING OUTLIERS. PROVIDE DESCRIPTIVE STATISTICS ON XVAR | | AND YVAR, AND PERFORM TESTS OF MODELING ASSUMPTIONS. | |--------------------<--- End of Description -->---------------------| |--------------------------------------------------------------------| |--------------<--- Start of Files or Arguments Needed -->-----------| | PARAMETERS : DSN - INPUT DATA SET NAME. | | OPT - OUTLIER ELIMINATION OPTION. | | OFFORD - FLAG OUTLIERS BUT INCLUDE IN | | ANALYSIS. | | OBRIEN - FLAG OUTLIERS AND EXCLUDE FROM | | ANALYSIS (CAUTION: MAY NOT BE | | APPROPRIATE WITH 'SMALL' | | DATASETS). | | XVAR - NAME OF X VARIABLE. | | YVAR - NAME OF Y VARIABLE. | | ID - NAME OF ID VARIABLE (I.E. CLINIC). | | C1 - X INTERVAL CUT-OFF POINTS (I.E. 5 10 15). | | IF OMITTED, MACRO WILL CHOOSE 4 INTERVALS | | BASED ON RANGE OF X VALUES. | | | | PROCESSING : OUTLIERS - A VALUE IS FLAGGED AS AN OUTLIER IF | | IT IS MORE THAN ONE STANDARD DEVIATION FROM THE | | NEXT HIGHEST (LOWEST) VALUE. (AT MOST 5 VALUES | | ARE CHECKED.) | | | | OUTPUT : PAGE 1 - | | DESCRIPTIVE STATISTICS - | | EXTREME VALUES WITH OUTLIERS FLAGGED. MEANS & | | STD DEVS ARE PROVIDED FOR EACH X INTERVAL AND | | OVERALL. P-VALUES FOR ANOVA & SPEARMAN'S TESTS. | | POLYNOMIAL REGRESSION ANALYSIS - | | POLYNOMIAL COEFFICIENTS FOR LINEAR THRU QUINTIC | | MODELS ALONG WITH R-SQUARE AND P VALUES FOR | | MODEL COMPARISONS. | | TESTS OF MODELING ASSUMPTIONS - | | SKEWNESS, KURTOSIS, NORMALITY, SPEARMAN AND | | LEVENE TESTS ARE PERFORMED ON EACH OF THE MODELS. | | PAGE 2 - | | PLOT OF Y VS. X AND MEAN-Y VS. MEAN-X. | | | | ERROR MSG : MESSAGES ARE OUTPUT IF AN INPUT PARAMETER IS | | MISSING OR OUT OF RANGE. | |---------------<--- End of Files or Arguments Needed -->------------| |--------------------------------------------------------------------| |----------------<--- Start of Example and Usage -->-----------------| | Usage: %REGRESS(DSN,OPT,XVAR,YVAR,ID,C1); | \-------------------<--- End of Example and Usage -->---------------*/ %MACRO REGRESS(DSN,OPT,XVAR,YVAR,ID,C1); /*--------------------------------------------\ | Author: C. D. STERTZ & S. L. DAOOD; | | Created: 8/26/88 | | Purpose: POLYNOMIAL REGRESSION ANALYSIS; | \--------------------------------------------*/ %GLOBAL QUIT I N4 NOBSERV; OPTIONS DQUOTE; %IF &DSN= %THEN %DO; %PUT "ERROR: NO DATASET NAME WAS SUPPLIED IN PARM LIST."; %GO TO ENDUP; %END; %IF &OPT= %THEN %DO; %PUT "ERROR: NO OPTION (OBRIEN/OFFORD) WAS SUPPLIED."; %GO TO ENDUP; %END; %IF ^(&OPT=OBRIEN | &OPT=OFFORD) %THEN %DO; %PUT "ERROR: OPTION MUST BE OBRIEN OR OFFORD."; %GO TO ENDUP; %END; %IF &XVAR= %THEN %DO; %PUT "ERROR: NO X VARIABLE SUPPLIED"; %GO TO ENDUP; %END; %IF &YVAR= %THEN %DO; %PUT "ERROR: NO Y VARIABLE SUPPLIED"; %GO TO ENDUP; %END; %LET _TIT= ; %IF &OPT=OBRIEN %THEN %LET _TIT="OUTLIERS ELIMINATED"; **** COUNT & ELIMINATE MISSINGS ****; DATA _DSN; SET &DSN END=EOF; KEEP &ID &XVAR &YVAR; RETAIN DEL_CNT 0; IF (&XVAR=. | &YVAR=.) THEN DEL_CNT = DEL_CNT+1; IF (EOF) THEN DO; DELC = PUT(DEL_CNT,3.); CALL SYMPUT('_DELS',DELC); END; IF (&XVAR=. | &YVAR=.) THEN DELETE; %IF &C1= %THEN %DO; %LET NUM=4; DATA _NULL_; SET _DSN END=EOF; RETAIN BOTTOM TOP; IF (_N_ =1) THEN DO; BOTTOM=&XVAR; TOP=&XVAR; END; IF (&XVARTOP) THEN TOP=&XVAR; IF (EOF) THEN DO; INTERVAL=(TOP - BOTTOM)/4; T1=CEIL(BOTTOM + INTERVAL); T2=CEIL(T1 + INTERVAL); T3=CEIL(T2 + INTERVAL); TT1=' '; TT2=' '; TT3=' '; TT1=PUT(T1,Z5.); TT2=PUT(T2,Z5.); TT3=PUT(T3,Z5.); NEWVAL=' '; NEWVAL=TT1 || ' ' || TT2 || ' ' || TT3; CALL SYMPUT('C1',NEWVAL); END; %END; DATA _DSN(KEEP=&ID GROUP WEIGH X X2 X3 X4 X5 Y); SET _DSN END=EOF; RETAIN COUNT 0 DEL_CNT 0; GROUP=" "; %LET BE=X; %LET I=1; X = &XVAR; X2 = X*X; X3 = X2*X; X4 = X3*X; X5 = X4*X; Y = &YVAR; COUNT = COUNT + 1; %DO %UNTIL(&BE= ); %LET BI=%SCAN(&C1,&I,' '); %LET BE=%SCAN(&C1,&I+1,' '); %IF &I=1 %THEN %DO; IF &XVAR <&BI THEN DO; GROUP=" X<&BI"; WEIGH=0; END; %END; %IF &BE^= %THEN %DO; %IF &BE<&BI %THEN %DO; PUT "ERROR: NON-INCREASING INTERVALS IN PARM LIST"; %GO TO ENDUP; %END; IF &BI <= &XVAR < &BE THEN DO; GROUP="&BI<=X<&BE"; WEIGH=1; END; %END; %ELSE %DO; IF &XVAR>=&BI THEN DO; GROUP="&BI<=X "; WEIGH=2; END; %END; %LET I=%EVAL(&I+1); %END; OUTPUT _DSN; %LET NUM=%EVAL(&I); IF EOF THEN DO; CC = PUT(COUNT,6.); CALL SYMPUT('QUIT',CC); END; DATA _DSN; SET _DSN END=EOF; %IF &QUIT>=6 %THEN %DO; RETAIN COUNT 0; COUNT = COUNT + 1; ID = &ID; %LET I=1; %DO II=1 %TO &NUM; G&I=0; %LET I=%EVAL(&I+1); %END; %LET BE=X; %LET I=1; %DO %UNTIL(&BE= ); %LET BI=%SCAN(&C1,&I,' '); %LET BE=%SCAN(&C1,&I+1,' '); %IF &I=1 %THEN %DO; %LET II=1; IF X < &BI THEN G&II=1; %END; %LET II=%EVAL(&II+1); %IF &BE^= %THEN %DO; %IF &BE<=&BI %THEN %DO; PUT "ERROR: NON-INCREASING INTERVALS IN PARM LIST"; %GO TO ENDUP; %END; IF &BI <= X < &BE THEN G&II=1; %END; %ELSE %DO; IF X >= &BI THEN G&II=1; %END; %LET I=%EVAL(&I+1); %END; IF EOF THEN DO; NN = PUT(COUNT,Z5.); DO I=1 TO 5; IF (SUBSTR(NN,I,1)^='0') THEN DO; INDEX = I; I=5; END; END; NNN = SUBSTR(NN,INDEX,5-INDEX+1); CALL SYMPUT('NOBSERV',NNN); END; **** DETERMINE AND THROW OUT OUTLIERS IF REQUESTED ****; %MACRO STD_DEV; SM = 0; SM_SQ = 0; DO I=START TO END; SM = SM + OBS(I); SM_SQ = SM_SQ + (OBS(I)**2); END; SQ_SM = SM**2 / (END - START + 1); S2 = (SM_SQ - SQ_SM) / (END - START); STDEV = SQRT(S2); %MEND STD_DEV; %MACRO STD_CHK; START = 1; END = &NOBSERV; %STD_DEV; IF (&NOBSERV>=2 & ABS(OBS(1) - OBS(2)) > STDEV) THEN DO; OUTL(1) = '*'; START = 2; %STD_DEV; END; IF (&NOBSERV>=3 & ABS(OBS(2) - OBS(3)) > STDEV) THEN DO; OUTL(1) = '*'; OUTL(2) = '*'; START = 3; %STD_DEV; END; IF (&NOBSERV>=4 & ABS(OBS(3) - OBS(4)) > STDEV) THEN DO; OUTL(1) = '*'; OUTL(2) = '*'; OUTL(3) = '*'; START = 4; %STD_DEV; END; IF (&NOBSERV>=5 & ABS(OBS(4) - OBS(5)) > STDEV) THEN DO; OUTL(1) = '*'; OUTL(2) = '*'; OUTL(3) = '*'; OUTL(4) = '*'; START = 5; %STD_DEV; END; IF (&NOBSERV>=6 & ABS(OBS(5) - OBS(6)) > STDEV) THEN DO; OUTL(1) = '*'; OUTL(2) = '*'; OUTL(3) = '*'; OUTL(4) = '*'; OUTL(5) = '*'; START = 6; %STD_DEV; END; IF (&NOBSERV>=7 & ABS(OBS(&NOBSERV) - OBS(&NOBSERV-1)) > STDEV) THEN DO; OUTL(10) = '*'; END = &NOBSERV - 1; IF (END > START) THEN DO; %STD_DEV; END; END; IF (&NOBSERV>=8 & ABS(OBS(&NOBSERV-1) - OBS(&NOBSERV-2)) > STDEV) THEN DO; OUTL(10) = '*'; OUTL(9) = '*'; END = &NOBSERV - 2; IF (END > START) THEN DO; %STD_DEV; END; END; IF (&NOBSERV>=9 & ABS(OBS(&NOBSERV-2) - OBS(&NOBSERV-3)) > STDEV) THEN DO; OUTL(10) = '*'; OUTL(9) = '*'; OUTL(8) = '*'; END = &NOBSERV - 3; IF (END > START) THEN DO; %STD_DEV; END; END; IF (&NOBSERV>=10 & ABS(OBS(&NOBSERV-3) - OBS(&NOBSERV-4)) > STDEV) THEN DO; OUTL(10) = '*'; OUTL(9) = '*'; OUTL(8) = '*'; OUTL(7) = '*'; END = &NOBSERV - 4; IF (END > START) THEN DO; %STD_DEV; END; END; IF (&NOBSERV>=11 & ABS(OBS(&NOBSERV-4) - OBS(&NOBSERV-5)) > STDEV) THEN DO; OUTL(10) = '*'; OUTL(9) = '*'; OUTL(8) = '*'; OUTL(7) = '*'; OUTL(6) = '*'; END = &NOBSERV - 5; IF (END > START) THEN DO; %STD_DEV; END; END; %MEND STD_CHK; %MACRO OUTLR(L); PROC SORT DATA=_DSN; BY DESCENDING &L; PROC TRANSPOSE DATA=_DSN OUT=_TRANSID PREFIX=ID&L._; VAR ID; DATA _TRANSID; SET _TRANSID; %LET N4 = 6; %IF (&NOBSERV > 10) %THEN %DO; %LET N4 = %EVAL(&NOBSERV-4); %END; KEEP ID&L._1-ID&L._5 ID&L._&N4-ID&L._&NOBSERV; PROC TRANSPOSE DATA=_DSN OUT=_TRANS PREFIX=&L._; VAR &L.; DATA _OUTL&L; SET _TRANS; LENGTH OUTL&L._1-OUTL&L._10 $1.; ARRAY OBS(*) &L._1-&L._&NOBSERV; ARRAY OUTL(10) OUTL&L._1-OUTL&L._10; %LET N4 = 6; %IF (&NOBSERV > 10) %THEN %DO; %LET N4 = %EVAL(&NOBSERV-4); %END; KEEP &L._1-&L._5 &L._&N4-&L._&NOBSERV OUTL&L._1-OUTL&L._10; %STD_CHK; DATA _MNMX&L; IF _N_=1 THEN SET _TRANSID; SET _OUTL&L; %MEND OUTLR; **** REMOVE OUTLYING X ****; %OUTLR(X); **** REMOVE OUTLYING Y ****; %OUTLR(Y); DATA _T_DAT; IF _N_=1 THEN DO; MERGE _MNMXX _MNMXY; END; SET _DSN; KEEP ID X Y WEIGH GROUP X2-X5 G1-G&NUM; %LET N4 = %EVAL(&NOBSERV-4); %LET N3 = %EVAL(&NOBSERV-3); %LET N2 = %EVAL(&NOBSERV-2); %LET N1 = %EVAL(&NOBSERV-1); %IF (&OPT = OBRIEN) %THEN %DO; IF ((X=X_1 & OUTLX_1='*') | (X=X_2 & OUTLX_2='*') | (X=X_3 & OUTLX_3='*') | (X=X_4 & OUTLX_4='*') | (X=X_5 & OUTLX_5='*') | (X=X_&N4 & OUTLX_6='*') | (X=X_&N3 & OUTLX_7='*') | (X=X_&N2 & OUTLX_8='*') | (X=X_&N1 & OUTLX_9='*') | (X=X_&NOBSERV & OUTLX_10='*') | (Y=Y_1 & OUTLY_1='*') | (Y=Y_2 & OUTLY_2='*') | (Y=Y_3 & OUTLY_3='*') | (Y=Y_4 & OUTLY_4='*') | (Y=Y_5 & OUTLY_5='*') | (Y=Y_&N4 & OUTLY_6='*') | (Y=Y_&N3 & OUTLY_7='*') | (Y=Y_&N2 & OUTLY_8='*') | (Y=Y_&N1 & OUTLY_9='*') | (Y=Y_&NOBSERV & OUTLY_10='*')) THEN DELETE; %END; PROC SORT DATA=_T_DAT; BY WEIGH GROUP; DATA _GRPS; SET _T_DAT END=EOF; BY WEIGH GROUP; RETAIN G_NUM 0; KEEP _GRPS; IF FIRST.GROUP THEN DO; _GRPS=GROUP; G_NUM= G_NUM + 1; OUTPUT; END; IF EOF THEN DO; NN = PUT(G_NUM,Z2.); DO I=1 TO 2; IF (SUBSTR(NN,I,1)^='0') THEN DO; INDEX = I; I=2; END; END; NNN = SUBSTR(NN,INDEX,2-INDEX+1); CALL SYMPUT('NUM',NNN); END; DATA _GRPS; SET _GRPS END=EOF; KEEP _GRPS; OUTPUT; IF EOF THEN DO; CT=_N_; _GRPS=" "; DO WHILE(CT<10); CT = CT + 1; OUTPUT; END; _GRPS=' TOTAL'; OUTPUT; END; DATA _DSN; SET _T_DAT END=EOF; IF (EOF) THEN DO; OUTS = &NOBSERV - _N_; OUTSPUT = PUT(OUTS,3.); CALL SYMPUT('_OUTS',OUTSPUT); NN = PUT(_N_,Z5.); DO I=1 TO 5; IF (SUBSTR(NN,I,1) ^= '0') THEN DO; INDEX = I; I = 5; END; END; NNN = SUBSTR(NN,INDEX,5-INDEX+1); CALL SYMPUT('NOBSERV',NNN); END; %LET KK = %EVAL(&NUM-1); **** CALCULATE GROUP & TOTAL STATS ****; PROC SORT DATA=_DSN; BY GROUP WEIGH; PROC MEANS NOPRINT DATA=_DSN; BY GROUP WEIGH; VAR Y X; OUTPUT OUT=_GRPSTAT MEAN=MN_Y MN_X STD=STD_Y N=N; PROC SORT DATA=_GRPSTAT; BY WEIGH GROUP; PROC MEANS NOPRINT DATA=_DSN; VAR Y X; OUTPUT OUT=_TOTSTAT(KEEP=MN_Y MN_X STD_Y STD_X CSS_Y CSS_X N) MEAN=MN_Y MN_X STD=STD_Y STD_X CSS=CSS_Y CSS_X N=N; DATA _NULL_; SET _TOTSTAT; CALL SYMPUT('MEANX', MN_X); DATA _MEANS; SET _GRPSTAT(IN=INGP) _TOTSTAT(IN=INSMN); DROP CT SAVE1-SAVE7; IF INGP THEN OUTPUT; IF INSMN THEN DO; SAVE1=N; SAVE2=MN_Y; SAVE3=MN_X; SAVE4=STD_Y; SAVE5=STD_X; SAVE6=CSS_Y; SAVE7=CSS_X; N=.; MN_Y=.; MN_X=.; STD_Y=.; STD_X=.; CSS_Y=.; CSS_X=.; GROUP=' '; CT=_N_; DO WHILE (CT<=10); OUTPUT; CT=CT + 1; END; GROUP=' TOTAL'; N=SAVE1; MN_Y=SAVE2; MN_X=SAVE3; STD_Y=SAVE4; STD_X=SAVE5; CSS_Y=SAVE6; CSS_X=SAVE7; OUTPUT; END; **** TRANSFORM X VALUES AROUND MEAN ****; DATA _DSNSAV; SET _DSN; DATA _DSN; SET _DSN; X = X - &MEANX; X2 = X * X; X3 = X2 * X; X4 = X3 * X; X5 = X4 * X; **** FIT POLYNOMIAL MODELS - OUTPUT COEFF & RESIDUALS ****; PROC SORT DATA=_DSN; BY X Y ID; PROC REG DATA=_DSN NOPRINT; MEAN: MODEL Y=; OUTPUT OUT=_RES_0 R=R_0; PROC REG DATA=_DSN NOPRINT OUTEST=_OUTREGR; LINEAR: MODEL Y=X; OUTPUT OUT=_RES_1 R=R_1; QUADRATC: MODEL Y=X X2; OUTPUT OUT=_RES_2 R=R_2; CUBIC: MODEL Y=X X2 X3; OUTPUT OUT=_RES_3 R=R_3; QUARTIC: MODEL Y=X X2 X3 X4; OUTPUT OUT=_RES_4 R=R_4; QUINTIC: MODEL Y=X X2 X3 X4 X5; OUTPUT OUT=_RES_5 R=R_5; **** ANOVA ON Y & MODIFIED LEVENE TEST ****; DATA _LEV; MERGE _RES_0 _RES_1 _RES_2 _RES_3 _RES_4 _RES_5; BY X Y ID; PROC UNIVARIATE NOPRINT DATA=_LEV; VAR R_0 R_1 R_2 R_3 R_4 R_5; OUTPUT OUT=R_STAT MEDIAN=LMD_0 LMD_1 LMD_2 LMD_3 LMD_4 LMD_5 MEAN = MNR_0 MNR_1 MNR_2 MNR_3 MNR_4 MNR_5 STD = STDR_0 STDR_1 STDR_2 STDR_3 STDR_4 STDR_5; DATA _LEV; IF _N_=1 THEN SET R_STAT; SET _LEV; LEV_0 = ABS(R_0 - LMD_0); LEV_1 = ABS(R_1 - LMD_1); LEV_2 = ABS(R_2 - LMD_2); LEV_3 = ABS(R_3 - LMD_3); LEV_4 = ABS(R_4 - LMD_4); LEV_5 = ABS(R_5 - LMD_5); IF (&NOBSERV>50) THEN DO; Z_0 = PROBNORM((R_0 - MNR_0)/STDR_0); Z_1 = PROBNORM((R_1 - MNR_1)/STDR_1); Z_2 = PROBNORM((R_2 - MNR_2)/STDR_2); Z_3 = PROBNORM((R_3 - MNR_3)/STDR_3); Z_4 = PROBNORM((R_4 - MNR_4)/STDR_4); Z_5 = PROBNORM((R_5 - MNR_5)/STDR_5); END; %MACRO GG; %DO I=1 %TO &KK; G&I %END; %MEND GG; PROC RSQUARE NOPRINT DATA=_LEV OUTEST=_OUTRSQ SSE MSE; MODEL Y = %GG / INCLUDE=&KK; MODEL LEV_0 = %GG / INCLUDE=&KK; MODEL LEV_1 = %GG / INCLUDE=&KK; MODEL LEV_2 = %GG / INCLUDE=&KK; MODEL LEV_3 = %GG / INCLUDE=&KK; MODEL LEV_4 = %GG / INCLUDE=&KK; MODEL LEV_5 = %GG / INCLUDE=&KK; DATA _AONY _LEV_P; SET _OUTRSQ; SSR = _RSQ_ * _SSE_ / (1 - _RSQ_); MSB = SSR / (&NUM-1); F = MSB / _MSE_; P = 1 - PROBF(F,&NUM-1,&NOBSERV-&NUM); IF _N_=1 THEN OUTPUT _AONY; IF _N_^=1 THEN OUTPUT _LEV_P; **** NORMALITY ****; %LET W_P_VAL1 = .753/.687/.686/ .713/.730/.749/.764/.781/ .792/.805/.814/.825/.835/ .844/.851/.858/.863/.868/ .873/.878/.881/.884/.888/ .891/.894/.896/.898/.900/ .902/.904/.906/.908/.910/ .912/.914/.916/.917/.919/ .920/.922/.923/.924/.926/ .927/.928/.929/.929/.930; %LET W_P_VAL2 = .756/.707/.715/ .743/.760/.778/.791/.806/ .817/.828/.837/.846/.855/ .863/.869/.874/.879/.884/ .888/.892/.895/.898/.901/ .904/.906/.908/.910/.912/ .914/.915/.917/.919/.920/ .922/.924/.925/.927/.928/ .929/.930/.932/.933/.934/ .935/.936/.937/.937/.938; %LET W_P_VAL3 = .767/.748/.762/ .788/.803/.818/.829/.842/ .850/.859/.866/.874/.881/ .887/.892/.897/.901/.905/ .908/.911/.914/.916/.918/ .920/.923/.924/.926/.927/ .929/.930/.931/.933/.934/ .935/.936/.938/.939/.940/ .941/.942/.943/.944/.945/ .945/.946/.947/.947/.947; %LET W_P_VAL4 = .789/.792/.806/ .826/.838/.851/.859/.869/ .876/.883/.889/.895/.901/ .906/.910/.914/.917/.920/ .923/.926/.928/.930/.931/ .933/.935/.936/.937/.939/ .940/.941/.942/.943/.944/ .945/.946/.947/.948/.949/ .950/.951/.951/.952/.953/ .953/.954/.954/.955/.955; %MACRO NORM; %IF (&NOBSERV<=50) %THEN %DO; PROC UNIVARIATE NOPRINT DATA=_LEV; VAR R_0 R_1 R_2 R_3 R_4 R_5; OUTPUT OUT=_NORML NORMAL=W_0 W_1 W_2 W_3 W_4 W_5; %MACRO PN; %DO I=0 %TO 5; PNORM_&I = ' P>.10'; IF (W_&I <= &W4) THEN PNORM_&I = ' .05 50) %THEN %DO; %DO I=0 %TO 5; PROC SORT DATA=_LEV; BY Z_&I; PROC TRANSPOSE DATA=_LEV OUT=_ZTRAN(DROP=_NAME_) PREFIX=Z_; VAR Z_&I.; %MACRO A2_CALC; ARRAY Z(&NOBSERV) Z_1 - Z_&NOBSERV; A2_&I. = 0; DO J=1 TO &NOBSERV; A2_&I = A2_&I + (2*J-1) * (LOG(Z(J)) + LOG(1 - Z(&NOBSERV-J+1))); END; A2_&I = (-1 * A2_&I / &NOBSERV) - &NOBSERV; A2_&I = A2_&I * (1 + .75/&NOBSERV + 2.25/(&NOBSERV**2)); IF (A2_&I >= 1.035) THEN PNORM_&I = ' P<=.01'; IF (A2_&I < 1.035) THEN PNORM_&I = '.01
.10'; %MEND A2_CALC; DATA _ZA2_&I; SET _ZTRAN; KEEP A2_&I PNORM_&I; RETAIN A2_&I; %A2_CALC; %END; DATA _NORML; MERGE _ZA2_0 _ZA2_1 _ZA2_2 _ZA2_3 _ZA2_4 _ZA2_5; %END; %MEND NORM; %NORM; **** SPEARMAN R ****; DATA _RES; SET _RES_0 _RES_1 _RES_2 _RES_3 _RES_4 _RES_5; ABS_R0 = ABS(R_0); ABS_R1 = ABS(R_1); ABS_R2 = ABS(R_2); ABS_R3 = ABS(R_3); ABS_R4 = ABS(R_4); ABS_R5 = ABS(R_5); PROC CORR SPEARMAN NOSIMPLE NOPRINT DATA=_RES OUTS=_OUTSPR; VAR X; WITH Y ABS_R0 ABS_R1 ABS_R2 ABS_R3 ABS_R4 ABS_R5; DATA _OUTSPR; SET _OUTSPR; IF (_TYPE_='CORR') THEN DO; T = SQRT(&NOBSERV - 2) * X / SQRT(1 - X**2); P = (1 - PROBT(ABS(T),&NOBSERV-2)) * 2; END; **** SKEWNESS, KURTOSIS & NORMALITY COEFFICIENTS & Z ****; PROC UNIVARIATE DATA=_RES NOPRINT; VAR X Y R_0 R_1 R_2 R_3 R_4 R_5; OUTPUT OUT=_COEFF N=N_X N_Y N_R0 N_R1 N_R2 N_R3 N_R4 N_R5 SKEWNESS=SKW_X SKW_Y SKW_R0 SKW_R1 SKW_R2 SKW_R3 SKW_R4 SKW_R5 KURTOSIS=KRT_X KRT_Y KRT_R0 KRT_R1 KRT_R2 KRT_R3 KRT_R4 KRT_R5 NORMAL=NML_X NML_Y NML_R0 NML_R1 NML_R2 NML_R3 NML_R4 NML_R5; %MACRO Z_P; %DO I=0 %TO 5; Z_SKW_R&I = SKW_R&I / SE_S; Z_KRT_R&I = KRT_R&I / SE_K; %END; %MEND Z_P; DATA _ZP; SET _COEFF; SE_S = SQRT(6 / &NOBSERV); SE_K = SQRT(24 / &NOBSERV); Z_SKW_X = SKW_X / SE_S; Z_KRT_X = KRT_X / SE_K; Z_SKW_Y = SKW_Y / SE_S; Z_KRT_Y = KRT_Y / SE_K; %Z_P; **** POLYNOMIAL REGRESSION ANALYSIS ****; DATA MEAN_PGA; SET _TOTSTAT; EDF_MN = &NOBSERV - 1; SSE_MN = CSS_Y; RMSE_MN = STD_Y; DATA _PGANAL; IF _N_=1 THEN SET MEAN_PGA; SET _OUTREGR; IF (X=.) THEN X = 0; IF (X2=.) THEN X2 = 0; IF (X3=.) THEN X3 = 0; IF (X4=.) THEN X4 = 0; IF (X5=.) THEN X5 = 0; B0 = INTERCEP - (X * &MEANX) + (X2 * &MEANX**2) - (X3 * &MEANX**3) + (X4 * &MEANX**4) + (X5 * &MEANX**5); B1 = X - (2 * X2 * &MEANX) + (3 * X3 * &MEANX**2) - (4 * X4 * &MEANX**3) + (5 * X5 * &MEANX**4); B2 = X2 - (3 * X3 * &MEANX) + (6 * X4 * &MEANX**2) - (10 * X5 * &MEANX**3); B3 = X3 - (4 * X4 * &MEANX) + (10 * X5 * &MEANX**2); B4 = X4 - (5 * X5 * &MEANX); B5 = X5; IF (_N_=1) THEN DO; B2=.; B3=.; B4=.; B5=.; END; IF (_N_=2) THEN DO; B3=.; B4=.; B5=.; END; IF (_N_=3) THEN DO; B4=.; B5=.; END; IF (_N_=4) THEN DO; B5=.; END; %if &sysver=5.18 %then %do; _RMSE_ = _SIGMA_; %end; _EDF_ = &NOBSERV - _N_ - 1; _SSE_ = _EDF_ * (_RMSE_**2); _RSQ_ = 1 - (_SSE_/SSE_MN); L_SSE = LAG(_SSE_); IF _N_=1 THEN L_SSE = SSE_MN; F_I_1 = (L_SSE - _SSE_) / (_RMSE_**2); P_I_1 = 1 - PROBF(F_I_1 , 1 , _EDF_); F_0 = ((SSE_MN - _SSE_) / _N_) / (_RMSE_**2); P_0 = 1 - PROBF(F_0 , _N_ , _EDF_); DATA __AONY; SET _AONY; KEEP _RMSE_ AONY_P; AONY_P=P; DATA _SPEAR; SET _OUTSPR; KEEP X SPEAR_P; IF _N_=4; SPEAR_P=P; DATA _P_SPEAR; SET _OUTSPR; RETAIN P_R0-P_R5; KEEP P_R0-P_R5; IF _N_=5 THEN P_R0=P; ELSE IF _N_=6 THEN P_R1=P; ELSE IF _N_=7 THEN P_R2=P; ELSE IF _N_=8 THEN P_R3=P; ELSE IF _N_=9 THEN P_R4=P; ELSE IF _N_=10 THEN P_R5=P; IF _N_=10 THEN OUTPUT; DATA _WOW; MERGE _MNMXX _MNMXY _GRPS _MEANS __AONY _SPEAR; DATA __LEV_P; SET _LEV_P; KEEP LEVP0-LEVP5; RETAIN LEVP0-LEVP5; IF _N_=1 THEN LEVP0=P; ELSE IF _N_=2 THEN LEVP1=P; ELSE IF _N_=3 THEN LEVP2=P; ELSE IF _N_=4 THEN LEVP3=P; ELSE IF _N_=5 THEN LEVP4=P; ELSE IF _N_=6 THEN LEVP5=P; IF _N_=6 THEN OUTPUT; /************************************************************/ /* PRINT RESULTS ON SPECIFIED FORMAT */ /************************************************************/ TITLE7 "P O L Y N O M I A L R E G R E S S I O N " ; DATA _NULL_; SET _WOW; FILE PRINT N=PS; PUT #1 @1 130*"=" #2 @1 "| DESCRIPTIVE STATISTICS |" @30 &_TIT #3 @2 "------------------------" @35 "# OF OBS DELETED DUE TO : MISSING VALUES = " "&_DELS" @95 "DATASET USED: " "&DSN" #4 @2 "X=" "&XVAR" @14 "Y=" "&YVAR" @35 " : OUTLIERS = " "&_OUTS" @95 "OPTION : " "&OPT"; PUT @1 130*"="; PUT @5 "EXTREME VALUES (* INDICATES OUTLIER)" @50 "|" @53 "X INTERVAL" @69 "N" @77 "MEAN X" @89 "MEAN Y" @102 "STD Y" @108 "|"; PUT @6 "X" @ 31"Y" @50 "|" @52 _GRPS $CHAR12. @65 N 6. @73 MN_X 10.2 @85 MN_Y 10.2 @97 STD_Y 10.2 @108 "|" @115 "ANOVA ON Y" / @4 "HIGH" @12 "&ID" @29 "HIGH" @37 "&ID" @50 "|" @108 "| USING X INTERVALS" / @2 X_1 8.3 @10 "(" @11 IDX_1 10. @21 ")" @23 OUTLX_1 $1. @27 Y_1 8.3 @35 "(" @36 IDY_1 10. @46 ")" @48 OUTLY_1 $1. @50 "|" @108 "| SQRT(MSE)=" @120 _RMSE_ 10.2 / @2 X_2 8.3 @10 "(" @11 IDX_2 10. @21 ")" @23 OUTLX_2 $1. @27 Y_2 8.3 @35 "(" @36 IDY_2 10. @46 ")" @48 OUTLY_2 $1. @50 "|" @108 "| P =" @120 AONY_P 10.3 / %DO I=3 %TO 5; @2 X_&I 8.3 @10 "(" @11 IDX_&I 10. @21 ")" @23 OUTLX_&I $1. @27 Y_&I 8.3 @35 "(" @36 IDY_&I 10. @46 ")" @48 OUTLY_&I $1. @50 "|" @108 "|" / %END; %DO I=1 %TO 1; @4 "LOW" @29 "LOW" @50 "|" @108 "|" / %END; %LET I=6; %LET II=%EVAL(&I); %LET OLDN = %EVAL(&NOBSERV + &_OUTS); %DO I=&N4 %TO &OLDN %BY 1; @2 X_&I 8.3 @10 "(" @11 IDX_&I 10. @21 ")" @23 OUTLX_&II $1. @27 Y_&I 8.3 @35 "(" @36 IDY_&I 10. @46 ")" @48 OUTLY_&II $1. @50 "|" @108 "|" / %LET II=%EVAL(&II+1); %END; ; PUT #12 @109 22*"-" // @110 "SPEARMANS (Y ON X)"/ @114 "R=" X 10.2 / @114 "P=" SPEAR_P 10.3; PN=8; DO I=1 TO 11; SET _WOW; IF I=11 THEN DO; PUT #PN @53 "---------" @65 "------" @73 "----------" @85 "----------" @97 "----------" ; PN=PN+1; END; IF I>1 THEN DO; PUT #PN @52 _GRPS $CHAR12. @65 N 6. @73 MN_X 10.2 @85 MN_Y 10.2 @97 STD_Y 10.2 ; PN=PN+1; END; END; PUT #20 @1 130*"="; PUT @1 "| POLYNOMIAL REGRESSION ANALYSIS |"/ @2 "--------------------------------" @117 "P-VALUE" / @43 "COEFFICIENTS" @87"ERROR" @112 "MODEL(I) MODEL(I)"/ @16 65*"-" @83 14*"-" @104 "2" @114 "VS VS"/ @2 "MODEL(I)" @24 "B0" @35 "B1" @46 "B2" @57 "B3" @68 "B4" @79 "B5" @84 "DF" @92 "RMSE" @103 "R" @110 "MODEL(I-1) MODEL(0)"/ @2 130*"-"; SET MEAN_PGA; PUT @2 "MEAN(0)" @16 MN_Y 10.3 @82 EDF_MN 4. @88 RMSE_MN 8.2; DO I=1 TO 5; SET _PGANAL(drop=_type_); IF _MODEL_="LINEAR" THEN _MODEL="LINEAR(1) "; ELSE IF _MODEL_="QUADRATC" THEN _MODEL="QUADRATIC(2)"; ELSE IF _MODEL_="CUBIC" THEN _MODEL="CUBIC(3)"; ELSE IF _MODEL_="QUARTIC" THEN _MODEL="QUARTIC(4)"; ELSE IF _MODEL_="QUINTIC" THEN _MODEL="QUINTIC(5)"; PUT @2 _MODEL @16 B0 10.3 @27 B1 10.4 @38 B2 10.4 @49 B3 10.5 @60 B4 10.5 @71 B5 10.5 @82 _EDF_ 4. +2 _RMSE_ 8.2 +2 _RSQ_ 8.3 @112 P_I_1 8.3 +2 P_0 8.3; END; PUT @1 130*"="; PUT @1 "| TEST OF MODELLING ASSUMPTIONS USING RESIDUALS " @49 "FROM THE FOLLOWING MODELS : |"; PUT @2 75*"-" / @39 "MEAN LINEAR QUADRATIC CUBIC " @92 "QUARTIC QUINTIC"; SET _ZP; PUT @3 "SKEWNESS : COEFFICIENT" @35 SKW_R0 8.2 +6 SKW_R1 8.2 +6 SKW_R2 8.2 +6 SKW_R3 8.2 +6 SKW_R4 8.2 +6 SKW_R5 8.2 / @18 "Z" @35 Z_SKW_R0 8.2 +6 Z_SKW_R1 8.2 +6 Z_SKW_R2 8.2 +6 Z_SKW_R3 8.2 +6 Z_SKW_R4 8.2 +6 Z_SKW_R5 8.2 // @3 "KURTOSIS : COEFFICIENT" @35 KRT_R0 8.2 +6 KRT_R1 8.2 +6 KRT_R2 8.2 +6 KRT_R3 8.2 +6 KRT_R4 8.2 +6 KRT_R5 8.2 / @18 "Z" @35 Z_KRT_R0 8.2 +6 Z_KRT_R1 8.2 +6 Z_KRT_R2 8.2 +6 Z_KRT_R3 8.2 +6 Z_KRT_R4 8.2 +6 Z_KRT_R5 8.2 /; SET _NORML; PUT @3 "NORMALITY : P " @32 PNORM_0 $CHAR11. +3 PNORM_1 $CHAR11. +3 PNORM_2 $CHAR11. +3 PNORM_3 $CHAR11. +3 PNORM_4 $CHAR11. +3 PNORM_5 $CHAR11. // @3 "COMMON VARIANCE :" ; SET _P_SPEAR; PUT @4 "P VALUE FROM SPEARMANS R" @35 P_R0 8.3 +6 P_R1 8.3 +6 P_R2 8.3 +6 P_R3 8.3 +6 P_R4 8.3 +6 P_R5 8.3/ @6 "USING ABSOLUTE RESIDUALS" /; SET __LEV_P; PUT @4 "P VALUE USING MODIFIED" @35 LEVP0 8.3 +6 LEVP1 8.3 +6 LEVP2 8.3 +6 LEVP3 8.3 +6 LEVP4 8.3 +6 LEVP5 8.3 / @6 "LEVENE TEST"; STOP; DATA _ME; SET _MEANS; IF GROUP=' ' | GROUP=' TOTAL' THEN DELETE; DATA _PLOT; MERGE _DSNSAV _ME; PROC PLOT DATA=_PLOT; PLOT Y*X MN_Y*MN_X="*"/OVERLAY; LABEL Y=%UNQUOTE(%QUOTE(%'&YVAR%')) X=%UNQUOTE(%QUOTE(%'&XVAR%')); %END; %ELSE %DO; PUT 'INSUFFICIENT NON-MISSING OBSERVATIONS (N<6)'; %END; %ENDUP: OPTIONS NODQUOTE; %MEND REGRESS;